import csv
import json
import time
from http.client import RemoteDisconnected
from pathlib import Path
from urllib.error import URLError
from urllib.parse import urlencode
from urllib.request import Request, urlopen


def fetch_indicator(country_batch: list[str], indicator: str, year_from: int, year_to: int) -> list[dict]:
    countries = ";".join(country_batch)
    params = {
        "format": "json",
        "per_page": "20000",
        "date": f"{year_from}:{year_to}",
    }
    url = f"https://api.worldbank.org/v2/country/{countries}/indicator/{indicator}?{urlencode(params)}"
    req = Request(url, headers={"User-Agent": "ess-research/1.0"})
    backoff = 1
    for attempt in range(7):
        try:
            with urlopen(req, timeout=60) as resp:
                data = json.loads(resp.read().decode("utf-8"))
            break
        except (RemoteDisconnected, URLError, TimeoutError) as e:
            if attempt == 6:
                raise
            time.sleep(backoff)
            backoff = min(backoff * 2, 30)
    else:
        raise RuntimeError("Unexpected retry loop fallthrough")
    # data[1] is the list of records
    return data[1] if isinstance(data, list) and len(data) > 1 else []


def fetch_indicator_recursive(countries: list[str], indicator: str, year_from: int, year_to: int) -> list[dict]:
    try:
        return fetch_indicator(countries, indicator, year_from, year_to)
    except Exception:
        if len(countries) <= 1:
            raise
        mid = len(countries) // 2
        return fetch_indicator_recursive(countries[:mid], indicator, year_from, year_to) + fetch_indicator_recursive(
            countries[mid:], indicator, year_from, year_to
        )


def main() -> None:
    root = Path(__file__).resolve().parents[1]
    out_dir = root / "data_external"
    out_dir.mkdir(parents=True, exist_ok=True)

    countries_path = root / "outputs" / "ess_r8_r11_countries.txt"
    countries = [c.strip() for c in countries_path.read_text(encoding="utf-8").splitlines() if c.strip()]

    # Restrict to the analysis window years (R8–R11 are within ~2016–2024).
    year_from, year_to = 2016, 2024

    indicators = {
        "fixed_broadband_per100": "IT.NET.BBND.P2",
        "internet_users_pct": "IT.NET.USER.ZS",
        "mobile_cellular_per100": "IT.CEL.SETS.P2",
    }

    # Store as (cntry, year) -> dict of values
    values: dict[tuple[str, int], dict] = {}
    for out_field, ind_code in indicators.items():
        rows = fetch_indicator_recursive(countries, ind_code, year_from, year_to)
        for r in rows:
            cntry = (r.get("country", {}) or {}).get("id")
            year_s = r.get("date")
            val = r.get("value")
            if not cntry or not year_s:
                continue
            try:
                year = int(year_s)
            except Exception:
                continue
            key = (cntry, year)
            if key not in values:
                values[key] = {"cntry": cntry, "year": year}
            values[key][out_field] = val
        time.sleep(0.5)

    out_path = out_dir / "broadband_country_year_worldbank.csv"
    rows_out = list(values.values())
    rows_out.sort(key=lambda d: (d["cntry"], d["year"]))
    fieldnames = ["cntry", "year", *indicators.keys(), "source", "notes"]
    with out_path.open("w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=fieldnames)
        w.writeheader()
        for r in rows_out:
            w.writerow(
                {
                    "cntry": r.get("cntry", ""),
                    "year": r.get("year", ""),
                    "fixed_broadband_per100": r.get("fixed_broadband_per100", ""),
                    "internet_users_pct": r.get("internet_users_pct", ""),
                    "mobile_cellular_per100": r.get("mobile_cellular_per100", ""),
                    "source": "World Bank API (WDI)",
                    "notes": "Country-year proxies; mobile_cellular is not mobile broadband.",
                }
            )

    print(f"Wrote: {out_path} rows={len(rows_out)}")


if __name__ == "__main__":
    main()
